#############################################################
##                                                         ##
##                    Replication Code for 				   ##
## Survival Analysis: A New Guide for Social Scientists    ##
##						CHAPTERS 2-4 					   ##
##		by Alejandro Quiroz Flores (University of Essex)   ##
##                                                         ## #############################################################

#############################################################
##			Code run in R 3.5.0 in MacOS 10.15.7		   ##
## Necessary packages listed below are installed at system ##
## 					level (in R framework) 				   ## 
## 		Please install necessary package dependencies 	   ##
#############################################################

############################################################# ##                                                         ## ##                    Load Packages                        ## ##                                                         ## #############################################################

library(foreign)
library(survival)
library(mstate)
library(mvna)
library(lattice)
library(cmprsk)
library(etm)
library(simPH)
library(coxme)
library(texreg)
library(ggplot2)

###################################################
##            Set seed for replication           ##
###################################################

set.seed(7777777)

###################################################
##         Set working directory for data        ##
###################################################

setwd("/Users/alex/Dropbox/Papers Essex/CUP Book/Manuscript/Manuscript_CUP_preamble/Submission/Resubmission")

###################################################
## Democracies Reversal Data from Maeda (2010)   ##
###################################################

#The data is from Stata. The data here includes Maeda's original data and an additional variable that creates elapsed time for democratic spells

maeda <- read.dta(file="Maeda2010JOP_TwoModes_with_elapt_olddems.dta")

#Look at the variable names, class of the dataset, and attach the data

names(maeda)
class(maeda)
attach(maeda)

#Create variables with familiar names

#Duration of democratic spell
t<-demmonth

#Duration of democratic spell-1 for start-stop counting format, necessary for time-varying covariates
t0<-demmonth-1

#Censoring variable, including correct replacement of values for right-censored cases
d<-replace(demend, demend==9,0)
table(d)
table(endmode)

#Competing risks variable with destinations. Note that this provides the same information as variable "d", but it must set as a factor

status<-as.factor(endmode)
class(status)

#Elapsed duration of democratic spell-1 for start-stop counting format, necessary for time-varying covariates. This will be used in repeated events models
elapt0<-elapt-1

#ID variable for the democratic spell
id<-demid

#Additional variables necessary for multi-state models

#Origin state
from<-replace(d, d==1,0)

#Target state. Note that the censored cases must be coded as the string "cens"
to<-as.factor(ifelse(endmode==0,"cens",endmode))
class(to)

#Rename duration
time<-t

#Join the new variables with Madea's data
maeda<-cbind(t0, t, d, elapt0, id, from, to, time, status, maeda)


############################################################# ##                                                         ## ##                 CHAPTER 2 PROPORTIONALITY               ## ##                                                         ## #############################################################

#Always good to inspect the survival object and check correct number of falures

#Survival object uses uses a (start,stop] counting format with a numerical censoring indicator "d" 
survobj.mrec<-Surv(t0,t,d, type="counting", origin=0)

#Check survival object
survcheck(survobj.mrec ~1, id=demid, data=maeda)

################### TABLE 2.2 #####################

#This is a traditional Cox mortality model
cox1 <- coxph(Surv(t0,t,d, type="counting", origin=0) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem, data= maeda, ties="efron", id=demid, cluster=country, robust=T, x=T)
summary(cox1)

#Log-lokelihood of the model
cox1$loglik

################### TABLE 2.3 #####################

# These are Grambsch And Therneau  tests for non-proportionality. The code uses different time scales for the test, which is indicated by the parameter 'transform'

phtest.id<-cox.zph(cox1,transform="identity", global=T)
phtest.id

phtest.rnk<-cox.zph(cox1,transform="rank", global=T)
phtest.rnk

phtest.km<-cox.zph(cox1,transform="km", global=T)
phtest.km

################### TABLE 2.4 #####################

#The tests indicate that Ethnic Fragmentation is not proportional, so it must be corrected with an interaction with the log of duration
lnt<-log(t)
tethnic2 <-lnt*ethnic
maeda<-cbind(maeda, tethnic2)

#This is the corrected model, which is also a traditional mortality model
cox2new <- coxph(Surv(t0,t,d, type="counting", origin=0) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem + tethnic2, data= maeda, ties="efron", id= demid, cluster=country, robust=T, x=T)
summary(cox2new)
cox2new$loglik

################### FIGURE 2.1 ####################

#This code reproduces Maeda's procedure to calculate the conditional linear coefficient for a non-proportional covariate, in this case, Ethnic Fragmentation. Note that Maeda corrects a different covariate

#The code extracts relevant point estimates from estimation results
cox2new$coefficients
cox2new$coefficients[6]
cox2new$coefficients[14]
cox2new$var

#This codes creates the time axis
sim.t<-seq(1,2350,1)
sim.lnt<-log(seq(1,2350,1))

#This codes create the linear combination of point estimates
preslope<-cox2new$coefficients[6]+cox2new$coefficients[14]*sim.lnt

#This codes uses the Delta method to produce the standard error of the linear combination and creates confidence intervals
prese<-sqrt(cox2new$var[6,6]+(cox2new$var[14,14]*sim.lnt^2)+(2*cox2new$var[14,6]*sim.lnt))
preup<-preslope+(1.96* prese)
predn <-preslope-(1.96* prese)

#The following produces a plot
plot(sim.t, preslope, ylab="Conditional Linear Coefficient",xlab="Months", xlim=c(1,300),col="purple4", type="l", lwd=3, cex.lab=1.2)
abline(0,0, col="black", lwd=2)
lines(sim.t, preup, col="deeppink", lty=2,lwd=3, xlim=c(1,300))
lines(sim.t, predn, col="deeppink", lty=2, lwd=3, xlim=c(1,300))

#The following produces a ggplot for the book
fig21data<-data.frame(cbind(sim.t,preslope,preup, predn))

ggplot(data=fig21data, mapping=aes(x=sim.t,y=preslope))+geom_line(color="purple4",size=1)+theme_bw()+labs(x="Months", y="Conditional Linear Coefficient")+geom_ribbon(data=fig21data, mapping=aes(ymin=predn,ymax=preup),alpha=.3,fill="plum2")+xlim(0,300)+ylim(-10,7.5)+theme(text=element_text(size=20),axis.text=element_text(size=20),legend.text=element_text(size=20))


################### FIGURE 2.2 ####################

#This code uses Gandrud's (2015) method in the simPH package to produce simulations of quantities of interest after estimation

#This is the simulation
sim.cox2new <-coxsimtvc(obj= cox2new, b="ethnic", btvc="tethnic2", qi="First Difference", Xj=1, tfun="log",from=0, to=100,by=10)

#This is the figure
simGG(sim.cox2new, type="ribbons", lsize=1,legend=F,alpha=.3,xlab="Months", cex.lab=9/10)+theme(text=element_text(size=20),axis.text=element_text(size=20),legend.text=element_text(size=20))



############################################################# ##                                                         ## ##                   CHAPTER 3 REPEATED EVENTS             ## ##                                                         ## #############################################################


#Always good to inspect the survival object and check correct number of failures

#Note that the ID has now changed, as we need to consider repeated spells of democracy for the same country

survobj.mrec3 <-Surv(elapt0,elapt,d, type="counting", origin=0)
survcheck(survobj.mrec3 ~1, id=ccode, data=maeda)

################### STRATIFICATION ################

#This code creates a stratification variable for each spell of democracy, as multiple countries have multiple spells. Essentially, it counts the number of democratic spells by country. This will be necessary for models that stratify the hazard rate, such as the conditional model

str<-demid
str<-replace(str,str>1,1)
str<-replace(str,ccode==160	& demid==3,2)
str<-replace(str,ccode==771   & demid==9,2)
str<-replace(str,ccode==140   & demid==18,2)
str<-replace(str,ccode==140	& demid==19,3)
str<-replace(str,ccode==155	& demid==23,2)
str<-replace(str,ccode==352	& demid==30,2)
str<-replace(str,ccode==315	& demid==34,2)
str<-replace(str,ccode==42	& demid==38,2)
str<-replace(str,ccode==42	& demid==39,3)
str<-replace(str,ccode==950	& demid==47,2)
str<-replace(str,ccode==950	& demid==48,3)
str<-replace(str,ccode==220	& demid==54,2)
str<-replace(str,ccode==452	& demid==60,2)
str<-replace(str,ccode==350	& demid==64,2)
str<-replace(str,ccode==41	& demid==68,2)
str<-replace(str,ccode==91	& demid==70,2)
str<-replace(str,ccode==732	& demid==81,2)
str<-replace(str,ccode==570	& demid==86,2)
str<-replace(str,ccode==570	& demid==87,3)
str<-replace(str,ccode==553	& demid==92,2)
str<-replace(str,ccode==475	& demid==110,2)
str<-replace(str,ccode==770	& demid==115,2)
str<-replace(str,ccode==135	& demid==120,2)
str<-replace(str,ccode==365	& demid==128,2)
str<-replace(str,ccode==940	& demid==135,2)
str<-replace(str,ccode==780	& demid==142,2)
str<-replace(str,ccode==625	& demid==144,2)
str<-replace(str,ccode==625	& demid==145,3)
str<-replace(str,ccode==640	& demid==153,2)
str<-replace(str,ccode==640	& demid==154,3)
str<-replace(str,ccode==640	& demid==155,4)
str<-replace(str,ccode==369	& demid==158,2)
str<-replace(str,ccode==165	& demid==162,2)
min(str)
max(str)

maeda<-cbind(maeda,str)

###################################################
################### TABLE 3.2 #####################
###################################################

############## Independent Increments #############
 
cox.ag <- coxph(Surv(elapt0,elapt,d, type="counting", origin=0) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem + tethnic2, data= maeda, ties="efron", id=ccode, cluster=ccode, robust=T, x=T)
summary(cox.ag)
cox.ag$loglik

################### Conditional ###################

cox.cond <- coxph(Surv(elapt0,elapt,d, type="counting", origin=0) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem + tethnic2 + strata(str), data= maeda, ties="efron", id=ccode, cluster=ccode, robust=T, x=T)
summary(cox.cond)
cox.cond$loglik

################### FIGURE 3.3 ####################

#In order to produce predictions, we must specify which cases we want predictions for. Often, this aspect of the simulation is neglected, but it is crucial to set up values of time varying-covariates, as well as IDs and all necessary variables used in estimation. The default will use the means of relevant covariates, but this is not necessarily correct. Therefore, this code takes the covariate values for 4 different countries, each with different numbers of transitions, and produces cumulative hazard functions. The censoring indicators do not make a difference for prediction, but for consistency they are set to 0.

#Mexico
mex.rep<-subset(maeda,ccode==70,select=c(ccode,elapt0,elapt,d,dev,gro,presi,mixed,majgovm,ethnic,ope,urb,postcw,impose, prereg_ind, prereg_mil, regiondem, tethnic2,str))
mex.rep$d<-replace(mex.rep$d, mex.rep$d>0, 0)

#Argentina
arg.rep<-subset(maeda,ccode==160,select=c(ccode,elapt0,elapt,d,dev,gro,presi,mixed,majgovm,ethnic,ope,urb,postcw,impose, prereg_ind, prereg_mil, regiondem, tethnic2,str))
arg.rep$d<-replace(arg.rep$d, arg.rep$d>0, 0)

#Sudan
sud.rep<-subset(maeda,ccode==625,select=c(ccode,elapt0,elapt,d,dev,gro,presi,mixed,majgovm,ethnic,ope,urb,postcw,impose, prereg_ind, prereg_mil, regiondem, tethnic2,str))
sud.rep$d<-replace(sud.rep$d, sud.rep$d>0, 0)

#Turkey
tur.rep<-subset(maeda,ccode==640,select=c(ccode,elapt0,elapt,d,dev,gro,presi,mixed,majgovm,ethnic,ope,urb,postcw,impose, prereg_ind, prereg_mil, regiondem, tethnic2,str))
tur.rep$d<-replace(tur.rep$d, tur.rep$d>0, 0)

#Join the data
four<-rbind(mex.rep, arg.rep, sud.rep, tur.rep)

#The prediction is produced by the following code for the data above
plot.cox.cond.all<-survfit(cox.cond, newdata=four,id=ccode,se.fit=F)
dim(plot.cox.cond.all)
summary(plot.cox.cond.all)

#The following produces a plot
plot(plot.cox.cond.all,cumhaz=T, xlab="Months", ylab="A(t)",ylim=c(0,1.1),xlim=c(0,150),lty=c(1,2,3,4),col=c("purple4","deeppink","blue", "darkgoldenrod"),lwd=c(3,3,3,3), cex.lab=1.2)
legend(39,.7,c("Mexico: 1 Democratic Spell","Argentina: 2 Democratic Spells","Sudan: 3 Democratic Spells","Turkey: 4 Democratic Spells"), cex=1,col=c("purple4","deeppink","blue", "darkgoldenrod"),lwd=c(3,3,3,3),lty=c(1,2,3,4))

#The following produces a ggplot for the book
plot.cox.cond.all$strata
fig33cumhaz<-plot.cox.cond.all$cumhaz
fig33time<-plot.cox.cond.all$time
f33data<-data.frame(fig33time,fig33cumhaz, Country=c(rep("Mexico",90),rep("Argentina",292),rep("Sudan",112),rep("Turkey",481)))

ggplot(data=f33data,aes(x=fig33time,y=fig33cumhaz,color=Country,linetype=Country))+geom_step(direction="hv",size=1)+theme_bw()+labs(x="Months", y="A(t)")+xlim(0,150)+ylim(0,1)+scale_color_manual(values=c("deeppink","purple4","blue2","goldenrod4"))+scale_linetype_manual(values=c(2,1,3,4))+theme(text=element_text(size=20),axis.text=element_text(size=20),legend.text=element_text(size=20))



############### Conditional Frailty ###############

cox.condfrail <- coxph(Surv(elapt0,elapt,d, type="counting", origin=0) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem + tethnic2 + strata(str) + frailty(ccode,distribution="gamma",method="em"), data= maeda, ties="efron")
summary(cox.condfrail)

#Obtain likelihoods for conditional fraility and for conditional
cox.condfrail$history[[1]]$c.loglik
cox.cond$loglik[[2]]

#Obtain frailty 
vartheta<-cox.condfrail$history[[1]]$theta
vartheta

############ Mixed Cox survival model #############

cox.re <- coxme(Surv(elapt0,elapt,d, type="counting", origin=0) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem + tethnic2 + (1|ccode), data= maeda, ties="efron")
print(cox.re)
cox.re$loglik

#This is the null model necessary to compare the effect of the random effects
cox.re.null <- coxph(Surv(elapt0,elapt,d, type="counting", origin=0) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem + tethnic2, data= maeda, ties="efron")
print(cox.re.null)
cox.re.null$loglik

#Test of the difference between random effects and no effects
anova(cox.re.null,cox.re)

############################################################# ##                                                         ## ##                CHAPTER 4 COMPETING RISKS                ## ##                                                         ## #############################################################

#Always good to inspect the survival object and check correct number of failures

#Note that the ID has now changed to the original format of democractic spells

#Note that the censoring indicator must now be set as a factor. Technically, this makes the model a multi-state model from the persoective of the package 'survival' 

survobj.mrec.comp <-Surv(t0,t,status, type="counting", origin=0)
survcheck(survobj.mrec.comp ~1, id=demid, data=maeda)

################### FIGURE 4.2 ####################

#This code produces the empirical Aalen-Johansen estimator

curves.mrec.comp<-survfit(Surv(t0,t,status, type="counting", origin=0) ~ 1,id=demid,data=maeda)
print(curves.mrec.comp)
curves.mrec.comp$transitions
summary(curves.mrec.comp)

#The following produces a plot
plot(curves.mrec.comp,xlab="Months", ylab="Probability in State",ylim=c(0,.3),xlim=c(0,600),lty=1:2,col=c("purple4","deeppink"),lwd=3, cex.lab=1.2)
legend(290,.125,c("Exogenous Termination","Endogenous Termination"), cex=1,col=c("purple4","deeppink"),lwd=3,lty=1:2)

#The following produces a ggplot for the book
summary(curves.mrec.comp)
curves.mrec.comp$time
fig42time<-rep(curves.mrec.comp$time,2)
curves.mrec.comp$pstate
fig42ps1<-curves.mrec.comp$pstate[,2]
fig42ps2<-curves.mrec.comp$pstate[,3]
f42allp<-c(fig42ps1,fig42ps2)
f42data<-data.frame(fig42time,f42allp,Termination=c(rep("Exogenous",123),rep("Endogenous",123)))

ggplot(data=f42data,aes(x=fig42time,y=f42allp,color= Termination,linetype=Termination))+geom_step(direction="hv",size=1)+theme_bw()+labs(x="Months", y="Probability in State")+xlim(0,600)+ylim(0,.3)+scale_color_manual(values=c("deeppink","purple4"))+scale_linetype_manual(values=c(2,1))+theme(text=element_text(size=20),axis.text=element_text(size=20),legend.text=element_text(size=20))


################### TABLE 4.1 #####################

#This code produces Madea's original competing risks model
 
cox3and5 <- coxph(Surv(t0,t,status, type="counting", origin=0) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem, data= maeda, ties="efron", id= demid, cluster=country, robust=T, x=T)
print(cox3and5)

#It is essential to test for non-proportionality
phtest.id.cr<-cox.zph(cox3and5,transform="identity", global=T)
phtest.id.cr
phtest.rnk.cr<-cox.zph(cox3and5,transform="rank", global=T)
phtest.rnk.cr
phtest.km.cr<-cox.zph(cox3and5,transform="km", global=T)
phtest.km.cr

#And then correct the offending covariates
tgro2 <-lnt*gro
mean(tethnic2)
tope2 <-lnt*ope
tpostcw2 <-lnt*postcw
tpresi2 <-lnt*presi

maeda<-cbind(maeda, tgro2, tope2, tpostcw2, tpresi2)

#This code estimates the corrected model. It includes additional code for corrected variables in each destination

#It is important to note that models for both destinations are estimated jointly. This does not make a difference for hazard rate analysis, but it is necessary for the computation of cumulative incidence functions 
 
corr.cox3and5 <- coxph(list(Surv(t0,t,status, type="counting", origin=0) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem, 
1:2 ~ tgro2 + tethnic2 + tope2 + tpostcw2,
1:3 ~ tpresi2),
data= maeda, ties="efron", id=demid, cluster=country, robust=T, x=T)
print(corr.cox3and5)

#The number of observations most be calculated manually
30367-1899

################### FIGURE 4.3 ####################

#The code above, and particularly "status" as a factor, turns the competing risks model into a multi-state model. At the time of writing, multi-state models are not yet ready to produce predictions with time-varying covariates and therefore the code below chooses a particular observation in time for the prediction of the cumulative incidence function  

#Data for Argentina in March 1976
arg1976.cr<-data.frame(
				t0 = 36,
				t = 37,
				status = 0, #necessary, but ignored
				dev = 9.25,
				gro = -2.1, 
				presi = 1, 
				mixed = 0, 
				majgovm = 1,  
				ethnic = .255,  
				ope = 6.76,  
				urb = 81,  
				postcw = 0,  
				impose = 0, 
				prereg_ind = 0,  
				prereg_mil = 1,
				regiondem = -.8,
				tgro2 = -7.59,
				tethnic2 = .92,
				tope2 = 24.43,
				tpostcw2 = 0,
				tpresi2 = 3.61)
arg1976.cr

#This code produces the simulation
plot.corr.cox3and5 <-survfit(corr.cox3and5,newdata= arg1976.cr,se.fit=T) 
dim(plot.corr.cox3and5)
summary(plot.corr.cox3and5)

#The following produces a plot
plot(plot.corr.cox3and5[,,2],xlab="Months", ylab="Probability in State",ylim=c(0,.15),xlim=c(0,600),lty=1,col="purple4",lwd=3, , cex.lab=1.2)
lines(plot.corr.cox3and5[,,3],xlab="Months",ylim=c(0,.15),xlim=c(0,600),lty=2,col="deeppink",lwd=3)
legend(250,.07,c("Exogenous Termination","Endogenous Termination"), cex=1,col=c("purple4","deeppink"),lwd=3,lty=1:2)

#The following produces a ggplot for the book
summary(plot.corr.cox3and5)
plot.corr.cox3and5$time
fig43time<-rep(plot.corr.cox3and5$time,2)
plot.corr.cox3and5$pstate
fig43ps1<-plot.corr.cox3and5$pstate[,,2]
fig43ps2<-plot.corr.cox3and5$pstate[,,3]
f43allp<-c(fig43ps1,fig43ps2)
f43data<-data.frame(fig43time,f43allp,Termination=c(rep("Exogenous",129),rep("Endogenous",129)))

ggplot(data=f43data,aes(x=fig43time,y=f43allp,color=Termination,linetype=Termination))+geom_step(direction="hv",size=1)+theme_bw()+labs(x="Months", y="Probability in State")+xlim(0,600)+ylim(0,.15)+scale_color_manual(values=c("deeppink","purple4"))+scale_linetype_manual(values=c(2,1))+theme(text=element_text(size=19),axis.text=element_text(size=19),legend.text=element_text(size=19))



################### TABLE 4.2 #####################

#The command 'finegray' creates a new dataset for each destination where units that did not experience the event of interest become censored and their durations are artificially extended with a decreasing weight from interval to interval

class(status)
table(status)

fg1<-finegray(Surv(t,status)~dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem +  regiondem + demid,id=demid,data=maeda,etype="1",na.action=na.omit,count="count")

#It is worth observing the weights for the artificial durations
head(fg1,400)

fg2<-finegray(Surv(t,status)~dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem +  regiondem + demid,id=demid,data=maeda,etype="2",na.action=na.omit,count="count")

#It is worth observing the weights for the artificial durations
head(fg2,400)

#The output produces the relevant data, but does not create the correct start-stop times, which need to be adjusted as follows

cor.fg1<-fg1
names(cor.fg1)
cor.fg1$fgstart<-ifelse(cor.fg1$count==0,cor.fg1$fgstop-1,cor.fg1$fgstart)
head(cor.fg1,400)

cor.fg2<-fg2
names(cor.fg2)
cor.fg2$fgstart<-ifelse(cor.fg2$count==0,cor.fg2$fgstop-1,cor.fg2$fgstart)
head(cor.fg2,400)

############## DATA SET 1: Exogenous ##############

cox.fg1<-coxph(Surv(fgstart,fgstop,fgstatus) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem +  regiondem, data= cor.fg1, ties="efron", weight=fgwt,id= demid, robust=T, x=T)
print(cox.fg1)
cox.fg1$loglik

#It is essential to test for non-proportionality
phtest.id.fg1<-cox.zph(cox.fg1,transform="identity", global=T)
phtest.id.fg1
phtest.rnk.fg1<-cox.zph(cox.fg1,transform="rank", global=T)
phtest.rnk.fg1
phtest.km.fg1<-cox.zph(cox.fg1,transform="km", global=T)
phtest.km.fg1

#And then correct the offending covariates
names(cor.fg1)
lnfgstop1 <-log(cor.fg1$fgstop)
cor.fg1 <-cbind(cor.fg1, lnfgstop1)
tprereg_ind2 <-cor.fg1$lnfgstop1*cor.fg1$prereg_ind
cor.fg1 <-cbind(cor.fg1, tprereg_ind2)
names(cor.fg1)

#This code estimates the corrected model 
cor.cox.fg1<-coxph(Surv(fgstart,fgstop,fgstatus) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem +  tprereg_ind2, data= cor.fg1, ties="efron", weight=fgwt,id= demid, robust=T, x=T)
print(cor.cox.fg1)
cor.cox.fg1$loglik

############## DATA SET 2: Endogenous #############

cox.fg2<-coxph(Surv(fgstart,fgstop,fgstatus) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem, data= cor.fg2, ties="efron", weight=fgwt,id= demid, robust=T, x=T)
print(cox.fg2)
cox.fg2$loglik

#It is essential to test for non-proportionality
phtest.id.fg2<-cox.zph(cox.fg2,transform="identity", global=T)
phtest.id.fg2
phtest.rnk.fg2<-cox.zph(cox.fg2,transform="rank", global=T)
phtest.rnk.fg2
phtest.km.fg2<-cox.zph(cox.fg2,transform="km", global=T)
phtest.km.fg2

#And then correct the offending covariates
names(cor.fg2)
lnfgstop2 <-log(cor.fg2$fgstop)
cor.fg2 <-cbind(cor.fg2, lnfgstop2)
tpresi2.fg <-cor.fg2$lnfgstop2*cor.fg2$presi
cor.fg2 <-cbind(cor.fg2, tpresi2.fg)
names(cor.fg2)

#This code estimates the corrected model. 
cor.cox.fg2<-coxph(Surv(fgstart,fgstop,fgstatus) ~ dev + gro +  presi +  mixed +  majgovm +  ethnic +  ope +  urb +  postcw +  impose + prereg_ind +  prereg_mil +  regiondem + tpresi2.fg, data= cor.fg2, ties="efron", weight=fgwt,id= demid, robust=T, x=T)
print(cor.cox.fg2)
cor.cox.fg2$loglik


#END


